#####################################################################
# Paper: Voluntary Practice Tests improve Exam Performance
# Last Update: 22.02.2026
# Replication code
#####################################################################

# import packages
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from econml.dml import CausalForestDML
from econml.cate_interpreter import SingleTreeCateInterpreter


########################################################
# DATA SUMMARIES
########################################################

# load alldata
alldata = pd.read_json(r"C:\Users\timba\OneDrive - Universität Bayreuth (1)\Uni\Research General\EVWL online test effectiveness assessment\Empirics\raw\alldata_cleaned.json")
# drop students who did not attend
alldata = alldata.dropna(subset=['points_wo_bonus', 'Total', 'high_school_average', 'average_grade_pre',
                       'average_grade_quant_pre', 'Antritt', 'exam_semester', 'ECTS_passed_pre',
                       'ECTS_passed_quant_pre', 'ECTS_econ_of_all_pre', 'ECTS_same_semester', 'Age', 'female',
                       'Sportökonomie', 'Recht_Wirtschaft', 'Rechtswissenschaft'])

# plotting parameters
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = ['Times New Roman']

### Figure 10: Distribution of bonus points
fig, ax = plt.subplots(figsize=(8, 4), dpi=300)
ax.hist(alldata["Total"], bins=13, color='#4A4A4A', edgecolor='white', linewidth=0.5, alpha=0.85)
ax.set_xlabel('Bonus Points', fontsize=14, fontfamily='Times New Roman')
ax.set_ylabel('Frequency', fontsize=14, fontfamily='Times New Roman')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.5)
ax.set_axisbelow(True)
ax.tick_params(axis='both', which='major', labelsize=14, length=4, width=0.8)  # Increased from 8 to 14
for label in ax.get_xticklabels() + ax.get_yticklabels():
    label.set_fontfamily('Times New Roman')
plt.tight_layout()
# Save in publication formats
plt.savefig(r"C:\Users\timba\OneDrive - Universität Bayreuth (1)\Uni\Research General\EVWL online test effectiveness assessment\Empirics\graphs\histogram_bonus_points_paper.pdf", dpi=300, bbox_inches='tight')
plt.show()


# Figure 1: Distributional Differences
fig, ax = plt.subplots(figsize=(8, 3), dpi=300)
untreated_data = alldata.query("bonus_4==0")["points_wo_bonus"].dropna()
treated_data = alldata.query("bonus_4==1")["points_wo_bonus"].dropna()
kde_untreated = stats.gaussian_kde(untreated_data)
kde_treated = stats.gaussian_kde(treated_data)
x_min = min(untreated_data.min(), treated_data.min())
x_max = max(untreated_data.max(), treated_data.max())
x_range = np.linspace(x_min, x_max, 300)
ax.plot(x_range, kde_untreated(x_range), color='#808080', linewidth=2.5, label='Untreated')
ax.plot(x_range, kde_treated(x_range), color='#404040', linewidth=2.5, label='Treated')
ax.axvline(32.915049, color='#808080', linestyle='--', linewidth=2, alpha=1)
ax.axvline(51.848586, color='#404040', linestyle='--', linewidth=2, alpha=1)
ax.set_xlabel('Exam Points w/o Bonus Points', fontsize=14, fontfamily='Times New Roman')
ax.set_ylabel('Density', fontsize=14, fontfamily='Times New Roman')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.5)
ax.set_axisbelow(True)
ax.tick_params(axis='both', which='major', labelsize=14, length=4, width=0.8)
for label in ax.get_xticklabels() + ax.get_yticklabels():
    label.set_fontfamily('Times New Roman')
ax.legend(fontsize=12, frameon=False, prop={'family': 'Times New Roman'})
plt.tight_layout()
plt.savefig(r"C:\Users\timba\OneDrive - Universität Bayreuth (1)\Uni\Research General\EVWL online test effectiveness assessment\Empirics\graphs\density_comparison_paper.pdf", dpi=300, bbox_inches='tight')
plt.show()

# difference
print("Difference points:", 51.848586-32.915049)
print("Difference %:", (51.848586-32.915049)/90)


### plot overlap
probit1 = smf.probit(formula='bonus_1 ~ high_school_average+average_grade_pre+average_grade_quant_pre+Antritt+exam_semester+ECTS_passed_pre+ECTS_passed_quant_pre+ECTS_econ_of_all_pre+ECTS_same_semester+Age+female+C(Exam)+Sportökonomie+Recht_Wirtschaft+Rechtswissenschaft', data=alldata).fit(cov_type='HC3')
probit2 = smf.probit(formula='bonus_4 ~ high_school_average+average_grade_pre+average_grade_quant_pre+Antritt+exam_semester+ECTS_passed_pre+ECTS_passed_quant_pre+ECTS_econ_of_all_pre+ECTS_same_semester+Age+female+C(Exam)+Sportökonomie+Recht_Wirtschaft+Rechtswissenschaft', data=alldata).fit(cov_type='HC3')
probit3 = smf.probit(formula='bonus_8 ~ high_school_average+average_grade_pre+average_grade_quant_pre+Antritt+exam_semester+ECTS_passed_pre+ECTS_passed_quant_pre+ECTS_econ_of_all_pre+ECTS_same_semester+Age+female+C(Exam)+Sportökonomie+Recht_Wirtschaft+Rechtswissenschaft', data=alldata).fit(cov_type='HC3')
print(summary_col([probit1,probit2,probit3],stars=True,float_format='%0.2f',info_dict={'N': lambda x: f"{int(x.nobs)}"}))
# predict propensity scores
alldata["ps_probit_1"] = probit1.predict(alldata)
alldata["ps_probit_4"] = probit2.predict(alldata)
alldata["ps_probit_8"] = probit3.predict(alldata)


# Figure 11: overlap assessment of bonus_1, bonus_4, and bonus_8
fig, axes = plt.subplots(1, 3, figsize=(8, 4), dpi=300)
bonus_numbers = [1, 4, 8]
all_kde_max = []
kde_data = []
for bonus_num in bonus_numbers:
    untreated_data = alldata.query(f"bonus_{bonus_num}==0")[f"ps_probit_{bonus_num}"].dropna()
    treated_data = alldata.query(f"bonus_{bonus_num}==1")[f"ps_probit_{bonus_num}"].dropna()
    kde_untreated = stats.gaussian_kde(untreated_data)
    kde_treated = stats.gaussian_kde(treated_data)
    x_min = min(untreated_data.min(), treated_data.min())
    x_max = max(untreated_data.max(), treated_data.max())
    x_range = np.linspace(x_min, x_max, 300)
    kde_untreated_vals = kde_untreated(x_range)
    kde_treated_vals = kde_treated(x_range)
    all_kde_max.append(max(kde_untreated_vals.max(), kde_treated_vals.max()))
    kde_data.append((x_range, kde_untreated_vals, kde_treated_vals))
global_ymax = max(all_kde_max)
for idx, bonus_num in enumerate(bonus_numbers):
    ax = axes[idx]
    x_range, kde_untreated_vals, kde_treated_vals = kde_data[idx]
    ax.plot(x_range, kde_untreated_vals, color='#808080', linewidth=2, label='Non Treated')
    ax.plot(x_range, kde_treated_vals, color='#404040', linewidth=2, label='Treated')
    ax.set_xlabel('Propensity Score', fontsize=11, fontfamily='Times New Roman')
    if idx == 0:
        ax.set_ylabel('Density', fontsize=11, fontfamily='Times New Roman')
    ax.set_title(f'Bonus {bonus_num}', fontsize=11, fontfamily='Times New Roman', pad=10)
    ax.set_ylim(0, global_ymax * 1.05)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.5)
    ax.set_axisbelow(True)
    ax.tick_params(axis='both', which='major', labelsize=9, length=3, width=0.6)
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_fontfamily('Times New Roman')
    if idx == 2:
        ax.legend(fontsize=10, frameon=False, prop={'family': 'Times New Roman'}, loc='upper right',bbox_to_anchor=(1.15, 1))
plt.tight_layout(pad=3.0, w_pad=4.0)
plt.savefig(r"C:\Users\timba\OneDrive - Universität Bayreuth (1)\Uni\Research General\EVWL online test effectiveness assessment\Empirics\graphs\assessment_overlap_assumption.pdf", dpi=300, bbox_inches='tight')
plt.show()


########################################################
# Causal Forest
########################################################

# select treatment
treatment = 'bonus_4'
# plotting parameters
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = ['Times New Roman']
# load alldata
df = pd.read_json(r"C:\Users\timba\OneDrive - Universität Bayreuth (1)\Uni\Research General\EVWL online test effectiveness assessment\Empirics\raw\alldata_cleaned.json")
# drop students who did not attend
df = df.dropna(subset=['points_wo_bonus', 'Total', 'high_school_average', 'average_grade_pre',
                       'average_grade_quant_pre', 'Antritt', 'exam_semester', 'ECTS_passed_pre',
                       'ECTS_passed_quant_pre', 'ECTS_econ_of_all_pre', 'ECTS_same_semester', 'Age', 'female',
                       'Sportökonomie', 'Recht_Wirtschaft', 'Rechtswissenschaft', 'Exam', treatment])
# create dummies
df_dummies = pd.get_dummies(df[["Exam"]], drop_first = True, dtype=int)
df = pd.concat([df, df_dummies], axis =1)
X = ["high_school_average","average_grade_pre","average_grade_quant_pre","Antritt","exam_semester","ECTS_passed_pre","ECTS_passed_quant_pre", "ECTS_econ_of_all_pre", "ECTS_same_semester","Age","Sportökonomie","Recht_Wirtschaft","Rechtswissenschaft"] + list(df_dummies.columns)

# causal forest
np.random.seed(1234)
forest_model = CausalForestDML(max_depth=3,
                               min_samples_leaf=30,
                               n_estimators = 300,
                               honest=True)
forest_model = forest_model.fit(Y=df["points_wo_bonus"], X=df[X], T=df[treatment])

# Figure 2: Illustration of regression tree
intrp = SingleTreeCateInterpreter(max_depth=2, min_samples_leaf=30).interpret(forest_model, df[X])
fig, ax = plt.subplots(figsize=(10, 5), dpi=300)
intrp.plot(feature_names=X, fontsize=14, ax=ax, filled=False)
plt.tight_layout()
# plt.savefig(r'C:\Users\timba\OneDrive - Universität Bayreuth (1)\Uni\Research General\EVWL online test effectiveness assessment\Empirics\graphs\cate_tree_interpretation.pdf', bbox_inches='tight',facecolor='white', edgecolor='none')
plt.show()

# Figure 7: feature importance derived from the honest causal forest
label_map = {
    'Total': 'Number Bon. Points',
    'high_school_average': 'High School Grade Avg.',
    'average_grade_pre': 'Uni. Grade Avg. (pre)',
    'average_grade_quant_pre': 'Uni. Grade Avg. Quant. (pre)',
    'exam_semester': 'Exam Semester',
    'Antritt': 'Exam Attempt',
    'ECTS_passed_pre': 'ECTS Collected (pre)',
    'ECTS_passed_quant_pre': 'Quant. ECTS Collected (pre)',
    'ECTS_econ_of_all_pre': 'Share of Econ ECTS (pre)',
    'ECTS_same_semester': 'ECTS Collected Same Sem.',
    'Age': 'Age (Years)',
    'female': 'Female (=1)',
    'Sportökonomie': 'Study Prog.: Sport & Manag.',
    'Recht_Wirtschaft': 'Study Prog.: Law & Econ',
    'Rechtswissenschaft': 'Study Prog.: Law'
}
fig, ax = plt.subplots(figsize=(8, 4), dpi=300)
sns.barplot(x=X, y=forest_model.feature_importances(), color='grey', ax=ax)
ax.set_ylabel('Importance', fontsize=14)
ax.set_xlabel('')
ax.set_xticklabels([label_map.get(label.get_text(), label.get_text()) for label in ax.get_xticklabels()], rotation=35, ha="right", fontsize=9)
ax.tick_params(axis='y', labelsize=12)
plt.tight_layout()
plt.savefig(r'C:\Users\timba\OneDrive - Universität Bayreuth (1)\Uni\Research General\EVWL online test effectiveness assessment\Empirics\graphs\causal_forest_feature_importance.pdf', bbox_inches='tight',facecolor='white', edgecolor='none')
plt.show()


### plot heterogeneity

# Figure 8: Treatment effect heterogeneity as a function of students' university grade average
var = 'average_grade_pre'
# feature heterogenous treatment effect
def compute_var_effect(df, hte_model, var):
    df_var = df.copy()
    df_var[["high_school_average","average_grade_pre","average_grade_quant_pre","Antritt","exam_semester","ECTS_passed_pre","ECTS_passed_quant_pre", "ECTS_econ_of_all", "ECTS_same_semester","Age","Sportökonomie","Recht_Wirtschaft","Rechtswissenschaft"] + list(df_dummies.columns)] = 0
    df_var[var] = df[var]

    df_wo_var = df.copy()
    df_wo_var[var] = 0

    df_var['predicted'] = hte_model.effect(df_var[X])
    df_wo_var['predicted'] = hte_model.effect(df_wo_var[X]).mean()

    df['hte_predicted'] = df_var['predicted'] #+ df_wo_var['predicted']
    return df
df = compute_var_effect(df, forest_model, var)
# plot
fig, ax = plt.subplots(figsize=(10, 4), dpi=300)
sns.scatterplot(x=var, y='hte_predicted', data=df, color='grey', alpha=1, s=30, ax=ax)
ax.set_ylabel('Predicted Treatment Effect', fontsize=16)
ax.set_xlabel(label_map.get(var, var), fontsize=16)
ax.tick_params(axis='both', labelsize=11)
plt.tight_layout()
plt.savefig(r'C:\Users\timba\OneDrive - Universität Bayreuth (1)\Uni\Research General\EVWL online test effectiveness assessment\Empirics\graphs\heterogeneous_treatment_effects_average_grade_pre.pdf', bbox_inches='tight',facecolor='white', edgecolor='none')
plt.show()


# Figure 9: Treatment effect heterogeneity as a function of students' ECTS_same_semester
var = 'ECTS_same_semester'
# feature heterogenous treatment effect
def compute_var_effect(df, hte_model, var):
    df_var = df.copy()
    df_var[["high_school_average","average_grade_pre","average_grade_quant_pre","Antritt","exam_semester","ECTS_passed_pre","ECTS_passed_quant_pre","ECTS_same_semester","Age","Sportökonomie","Recht_Wirtschaft","Rechtswissenschaft"] + list(df_dummies.columns)] = 0
    df_var[var] = df[var]

    df_wo_var = df.copy()
    df_wo_var[var] = 0

    df_var['predicted'] = hte_model.effect(df_var[X])
    df_wo_var['predicted'] = hte_model.effect(df_wo_var[X]).mean()

    df['hte_predicted'] = df_var['predicted'] #+ df_wo_var['predicted']
    return df
df = compute_var_effect(df, forest_model, var)
# plot
fig, ax = plt.subplots(figsize=(10, 4), dpi=300)
sns.scatterplot(x=var, y='hte_predicted', data=df, color='grey', alpha=1, s=30, ax=ax)
ax.set_ylabel('Predicted Treatment Effect', fontsize=16)
ax.set_xlabel(label_map.get(var, var), fontsize=16)
ax.tick_params(axis='both', labelsize=11)
plt.tight_layout()
plt.savefig(r'C:\Users\timba\OneDrive - Universität Bayreuth (1)\Uni\Research General\EVWL online test effectiveness assessment\Empirics\graphs\heterogeneous_treatment_effects_ects_same_semester.pdf', bbox_inches='tight',facecolor='white', edgecolor='none')
plt.show()

